# This file is a mixture that allows for re-specifying the agent_data rather than taking it from the archive
# and also has product_data outside of the pyblp.Problem, which allows for a merge later with new characteristic
# At this stage simply draw 200 MC consumers and compare to col2. 
#
import pyblp
import numpy as np
import pandas as pd

#product data:
product_data = pd.read_csv(pyblp.data.BLP_PRODUCTS_LOCATION) # 2217*33
product_data.head()
product_data.count()

#code to add on the domestic dummy
domestic = pd.read_csv('Data/BLP99/domestic.csv')
domestic.head()
product_data = product_data.merge(domestic, how='left', on='car_ids')
product_data.count()  
#note that domestic is last (after the instruments)
# replacing the instruments that come inside of product_data
#  with "built" Gandhi Houde instruments

demand_instruments_dif = pyblp.build_differentiation_instruments(
    pyblp.Formulation('0 + hpwt + air + mpd + space + domestic'),
    product_data
)
type(demand_instruments_dif) # it is a numpy array
# delete the 8 pre-existing demand iv
for i in range(8):
    del product_data[f'demand_instruments{i}']
for i, column in enumerate(demand_instruments_dif.T):
    product_data[f'demand_instruments{i}'] = column
product_data.count()  

supply_instruments = np.c_[
    pyblp.build_blp_instruments(pyblp.Formulation('1 + log(hpwt) + air + log(mpg) + log(space) + domestic'), product_data),
    pyblp.build_blp_instruments(pyblp.Formulation('0 + trend'), product_data)[:, 0],
    product_data['mpd'],
]
#delete the old supply IV
for i in range(12):
    del product_data[f'supply_instruments{i}']
for i, column in enumerate(supply_instruments.T):
    product_data[f'supply_instruments{i}'] = column


# construct agent data from primitives
sds = pd.read_csv('Data/BLP99/sdincome.csv', header=None, names=['sd'], usecols=[0])
means = pd.read_csv('Data/BLP99/meanincome.csv', header=None, names=['year', 'mean'], usecols=[0, 1])
market_ids = []
weights = []
nodes = []
income = []
for index, (_, row) in enumerate(means.iterrows()):
    integration = pyblp.Integration('halton', 10_000, {
        'discard': 1000 + index * 10_000,
        'seed': index,
    })    
    untransformed_agents = pyblp.build_integration(integration, 7) #≠6
    market_ids.append(np.repeat(1900 + int(row['year']), untransformed_agents.weights.size))
    weights.append(untransformed_agents.weights)
    nodes.append(untransformed_agents.nodes[:, :-1])
    income.append(np.exp(row['mean'] + float(sds['sd']) * untransformed_agents.nodes[:, -1]))

# structure the problem
product_formulations = (
   pyblp.Formulation('1 + hpwt + air + mpd + space + domestic'), # added domestic
   pyblp.Formulation('1 + prices + hpwt + air + mpd + space + domestic'), # added domestic
   pyblp.Formulation('1 + log(hpwt) + air + log(mpg) + log(space) + trend + domestic')
)
product_formulations
agent_formulation = pyblp.Formulation('0 + I(1 / income)')
agent_formulation
problem = pyblp.Problem(product_formulations, product_data, agent_formulation,agent_data={
        'market_ids': np.concatenate(market_ids),
        'weights': np.concatenate(weights),
        'nodes': np.vstack(nodes),
        'income': np.concatenate(income),
    }, 
    costs_type='log')
problem
initial_sigma = np.diag([3.612, 0, 4.628, 1.818, 1.050, 2.056, 1]) # added 1
initial_sigma
initial_pi = np.c_[[0, -43.501, 0, 0, 0, 0, 0]] # added 0
initial_pi

# solve with differentiation instruments for demand, regular IV for supply
results = problem.solve(
    initial_sigma,
    initial_pi,
    costs_bounds=(0.001, None),
    W_type='clustered',
    se_type='clustered',
    initial_update=True,
    iteration=pyblp.Iteration('squarem', {'atol': 1e-14, 'max_evaluations': 1000}),
    scale_objective = False,
    optimization=pyblp.Optimization('bfgs', {'gtol': 1e-5, 'maxiter': 1000}),
    method='1s',
)
results
elasticities = results.compute_elasticities()
means = results.extract_diagonal_means(elasticities)
np.mean(means) #
np.savetxt('Output/DomesticDifIV/own_elasticities_halton10k_domestic_1s.csv',means, delimiter=",")
np.savetxt('Output/DomesticDifIV/parameters_halton10k_domestic_1s.csv',results.parameters, fmt="%6.3f", delimiter=",")
par_cov = np.diagonal(results.parameter_covariances)
np.savetxt('Output/DomesticDifIV/par_cov_halton10k_domestic_1s.csv',par_cov, fmt="%6.3f", delimiter=",")

#skip optimal instruments since the dif IV are supposed to be good. i think CGLT used them
